{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:27.808469700Z",
     "start_time": "2024-03-12T12:41:26.931952Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import warnings\n",
    "\n",
    "from matplotlib.ticker import PercentFormatter\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "%matplotlib inline\n",
    "\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']\n",
    "plt.rcParams['axes.unicode_minus'] = False"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "426ff2d7a6884889",
   "metadata": {},
   "source": [
    "# 整理clicks orders users"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "33654f04a5faacd3",
   "metadata": {},
   "source": [
    "## clicks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ab4c30d25ce4b210",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:36.041614700Z",
     "start_time": "2024-03-12T12:41:27.808469700Z"
    }
   },
   "outputs": [],
   "source": [
    "# 'clicks' table   点击数据\n",
    "clicks = pd.read_csv('./JD_data/JD_click_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3d93a4adf558a06",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:36.057209100Z",
     "start_time": "2024-03-12T12:41:36.042120700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>4c3d6d10c2</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>eb0718c1c9</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>59f84cf342</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214510</th>\n",
       "      <td>a8a96e022a</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-31 21:45:07</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214511</th>\n",
       "      <td>eb3f2d2fd8</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-31 11:31:11</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214512</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-31 11:31:08</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214513</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-31 19:28:25</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214514</th>\n",
       "      <td>87b853b910</td>\n",
       "      <td>-</td>\n",
       "      <td>2018-03-31 06:29:47</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20214515 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel\n",
       "0         a234e08c57  4c3d6d10c2  2018-03-01 23:57:53  wechat\n",
       "1         6449e1fd87           -  2018-03-01 16:13:48  wechat\n",
       "2         09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat\n",
       "3         09b70fcd83  eb0718c1c9  2018-03-01 16:34:08  wechat\n",
       "4         09b70fcd83  59f84cf342  2018-03-01 22:20:35  wechat\n",
       "...              ...         ...                  ...     ...\n",
       "20214510  a8a96e022a           -  2018-03-31 21:45:07  others\n",
       "20214511  eb3f2d2fd8           -  2018-03-31 11:31:11  others\n",
       "20214512  fbce41fd82           -  2018-03-31 11:31:08  others\n",
       "20214513  fbce41fd82           -  2018-03-31 19:28:25  others\n",
       "20214514  87b853b910           -  2018-03-31 06:29:47  others\n",
       "\n",
       "[20214515 rows x 4 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clicks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "420e9710bc3f9061",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:37.788745Z",
     "start_time": "2024-03-12T12:41:36.059199500Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>4c3d6d10c2</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>eb0718c1c9</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>59f84cf342</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214510</th>\n",
       "      <td>a8a96e022a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 21:45:07</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214511</th>\n",
       "      <td>eb3f2d2fd8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:11</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214512</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:08</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214513</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 19:28:25</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20214514</th>\n",
       "      <td>87b853b910</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 06:29:47</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20214515 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel\n",
       "0         a234e08c57  4c3d6d10c2  2018-03-01 23:57:53  wechat\n",
       "1         6449e1fd87         NaN  2018-03-01 16:13:48  wechat\n",
       "2         09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat\n",
       "3         09b70fcd83  eb0718c1c9  2018-03-01 16:34:08  wechat\n",
       "4         09b70fcd83  59f84cf342  2018-03-01 22:20:35  wechat\n",
       "...              ...         ...                  ...     ...\n",
       "20214510  a8a96e022a         NaN  2018-03-31 21:45:07  others\n",
       "20214511  eb3f2d2fd8         NaN  2018-03-31 11:31:11  others\n",
       "20214512  fbce41fd82         NaN  2018-03-31 11:31:08  others\n",
       "20214513  fbce41fd82         NaN  2018-03-31 19:28:25  others\n",
       "20214514  87b853b910         NaN  2018-03-31 06:29:47  others\n",
       "\n",
       "[20214515 rows x 4 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clicks['sku_ID'] = clicks['sku_ID'].replace('-', np.nan)\n",
    "clicks['user_ID'] = clicks['user_ID'].replace('-', np.nan)\n",
    "clicks['request_time'] = clicks['request_time'].replace('-', np.nan)\n",
    "clicks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c2ebea3f053474",
   "metadata": {},
   "source": [
    "## orders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f9a0e73bb6a1d3eb",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.453561500Z",
     "start_time": "2024-03-12T12:41:37.789745800Z"
    }
   },
   "outputs": [],
   "source": [
    "# 'orders' table    订单数据\n",
    "orders = pd.read_csv('./JD_data/JD_order_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "afb6a8ffb36e35c7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.469639600Z",
     "start_time": "2024-03-12T12:41:38.455566100Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>dc_ori</th>\n",
       "      <th>dc_des</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7444318d01</td>\n",
       "      <td>33a9e56257</td>\n",
       "      <td>067b673f2b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 11:10:40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>99.9</td>\n",
       "      <td>53.9</td>\n",
       "      <td>5.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>f973b01694</td>\n",
       "      <td>4ea3cf408f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 09:13:26.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>58.5</td>\n",
       "      <td>19.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8c1cec8d4b</td>\n",
       "      <td>b87cb736cb</td>\n",
       "      <td>fc5289b139</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 21:29:50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>d43a33c38a</td>\n",
       "      <td>4829223b6f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 19:13:37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>78.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549984</th>\n",
       "      <td>3ad06b9fbe</td>\n",
       "      <td>a27b3ed4d4</td>\n",
       "      <td>a9109972d1</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 01:22:47.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549985</th>\n",
       "      <td>c9d77a7ed0</td>\n",
       "      <td>18f92434cd</td>\n",
       "      <td>7f53769d3f</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 08:55:57.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>118.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>59</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549986</th>\n",
       "      <td>b9ad79338f</td>\n",
       "      <td>b5caf8a580</td>\n",
       "      <td>8dc4a01dec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 13:31:01.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549987</th>\n",
       "      <td>be3a9414b1</td>\n",
       "      <td>20ba6655f3</td>\n",
       "      <td>2dd6b818ec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:51:18.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>189.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549988</th>\n",
       "      <td>02d31f05c9</td>\n",
       "      <td>f260895cbe</td>\n",
       "      <td>10d369ef96</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 18:21:16.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>68.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>549989 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          order_ID     user_ID      sku_ID  order_date             order_time  \\\n",
       "0       d0cf5cc6db  0abe9ef2ce  581d5b54c1  2018-03-01  2018-03-01 17:14:25.0   \n",
       "1       7444318d01  33a9e56257  067b673f2b  2018-03-01  2018-03-01 11:10:40.0   \n",
       "2       f973b01694  4ea3cf408f  623d0a582a  2018-03-01  2018-03-01 09:13:26.0   \n",
       "3       8c1cec8d4b  b87cb736cb  fc5289b139  2018-03-01  2018-03-01 21:29:50.0   \n",
       "4       d43a33c38a  4829223b6f  623d0a582a  2018-03-01  2018-03-01 19:13:37.0   \n",
       "...            ...         ...         ...         ...                    ...   \n",
       "549984  3ad06b9fbe  a27b3ed4d4  a9109972d1  2018-03-31  2018-03-31 01:22:47.0   \n",
       "549985  c9d77a7ed0  18f92434cd  7f53769d3f  2018-03-31  2018-03-31 08:55:57.0   \n",
       "549986  b9ad79338f  b5caf8a580  8dc4a01dec  2018-03-31  2018-03-31 13:31:01.0   \n",
       "549987  be3a9414b1  20ba6655f3  2dd6b818ec  2018-03-31  2018-03-31 12:51:18.0   \n",
       "549988  02d31f05c9  f260895cbe  10d369ef96  2018-03-31  2018-03-31 18:21:16.0   \n",
       "\n",
       "        quantity  type promise  original_unit_price  final_unit_price  \\\n",
       "0              1     2       -                 89.0              79.0   \n",
       "1              1     1       2                 99.9              53.9   \n",
       "2              1     1       2                 78.0              58.5   \n",
       "3              1     1       2                 61.0              35.0   \n",
       "4              1     1       1                 78.0              53.0   \n",
       "...          ...   ...     ...                  ...               ...   \n",
       "549984         1     2       -                  0.0              -1.0   \n",
       "549985         1     1       3                118.0              55.0   \n",
       "549986         1     1       2                 78.0              78.0   \n",
       "549987         1     2       -                189.0              78.0   \n",
       "549988         1     2       4                 68.0              49.0   \n",
       "\n",
       "        direct_discount_per_unit  quantity_discount_per_unit  \\\n",
       "0                            0.0                        10.0   \n",
       "1                            5.0                        41.0   \n",
       "2                           19.5                         0.0   \n",
       "3                            0.0                        26.0   \n",
       "4                           19.0                         0.0   \n",
       "...                          ...                         ...   \n",
       "549984                       0.0                         0.0   \n",
       "549985                      63.0                         0.0   \n",
       "549986                       0.0                         0.0   \n",
       "549987                     111.0                         0.0   \n",
       "549988                      19.0                         0.0   \n",
       "\n",
       "        bundle_discount_per_unit  coupon_discount_per_unit  gift_item  dc_ori  \\\n",
       "0                            0.0                       0.0          0       4   \n",
       "1                            0.0                       0.0          0      28   \n",
       "2                            0.0                       0.0          0      28   \n",
       "3                            0.0                       0.0          0       4   \n",
       "4                            0.0                       6.0          0       3   \n",
       "...                          ...                       ...        ...     ...   \n",
       "549984                       0.0                       1.0          1       2   \n",
       "549985                       0.0                       0.0          0      59   \n",
       "549986                       0.0                       0.0          0       2   \n",
       "549987                       0.0                       0.0          0       4   \n",
       "549988                       0.0                       0.0          0       4   \n",
       "\n",
       "        dc_des  \n",
       "0           28  \n",
       "1           28  \n",
       "2           28  \n",
       "3           28  \n",
       "4           16  \n",
       "...        ...  \n",
       "549984       2  \n",
       "549985       2  \n",
       "549986       2  \n",
       "549987      28  \n",
       "549988      28  \n",
       "\n",
       "[549989 rows x 17 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "a1843ec95fc927f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.556196500Z",
     "start_time": "2024-03-12T12:41:38.469639600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7444318d01</td>\n",
       "      <td>33a9e56257</td>\n",
       "      <td>067b673f2b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 11:10:40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>99.9</td>\n",
       "      <td>53.9</td>\n",
       "      <td>5.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>f973b01694</td>\n",
       "      <td>4ea3cf408f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 09:13:26.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>58.5</td>\n",
       "      <td>19.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8c1cec8d4b</td>\n",
       "      <td>b87cb736cb</td>\n",
       "      <td>fc5289b139</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 21:29:50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>d43a33c38a</td>\n",
       "      <td>4829223b6f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 19:13:37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>78.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549984</th>\n",
       "      <td>3ad06b9fbe</td>\n",
       "      <td>a27b3ed4d4</td>\n",
       "      <td>a9109972d1</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 01:22:47.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549985</th>\n",
       "      <td>c9d77a7ed0</td>\n",
       "      <td>18f92434cd</td>\n",
       "      <td>7f53769d3f</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 08:55:57.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>118.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549986</th>\n",
       "      <td>b9ad79338f</td>\n",
       "      <td>b5caf8a580</td>\n",
       "      <td>8dc4a01dec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 13:31:01.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549987</th>\n",
       "      <td>be3a9414b1</td>\n",
       "      <td>20ba6655f3</td>\n",
       "      <td>2dd6b818ec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:51:18.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-</td>\n",
       "      <td>189.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549988</th>\n",
       "      <td>02d31f05c9</td>\n",
       "      <td>f260895cbe</td>\n",
       "      <td>10d369ef96</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 18:21:16.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>68.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>549989 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          order_ID     user_ID      sku_ID  order_date             order_time  \\\n",
       "0       d0cf5cc6db  0abe9ef2ce  581d5b54c1  2018-03-01  2018-03-01 17:14:25.0   \n",
       "1       7444318d01  33a9e56257  067b673f2b  2018-03-01  2018-03-01 11:10:40.0   \n",
       "2       f973b01694  4ea3cf408f  623d0a582a  2018-03-01  2018-03-01 09:13:26.0   \n",
       "3       8c1cec8d4b  b87cb736cb  fc5289b139  2018-03-01  2018-03-01 21:29:50.0   \n",
       "4       d43a33c38a  4829223b6f  623d0a582a  2018-03-01  2018-03-01 19:13:37.0   \n",
       "...            ...         ...         ...         ...                    ...   \n",
       "549984  3ad06b9fbe  a27b3ed4d4  a9109972d1  2018-03-31  2018-03-31 01:22:47.0   \n",
       "549985  c9d77a7ed0  18f92434cd  7f53769d3f  2018-03-31  2018-03-31 08:55:57.0   \n",
       "549986  b9ad79338f  b5caf8a580  8dc4a01dec  2018-03-31  2018-03-31 13:31:01.0   \n",
       "549987  be3a9414b1  20ba6655f3  2dd6b818ec  2018-03-31  2018-03-31 12:51:18.0   \n",
       "549988  02d31f05c9  f260895cbe  10d369ef96  2018-03-31  2018-03-31 18:21:16.0   \n",
       "\n",
       "        quantity  type promise  original_unit_price  final_unit_price  \\\n",
       "0              1     2       -                 89.0              79.0   \n",
       "1              1     1       2                 99.9              53.9   \n",
       "2              1     1       2                 78.0              58.5   \n",
       "3              1     1       2                 61.0              35.0   \n",
       "4              1     1       1                 78.0              53.0   \n",
       "...          ...   ...     ...                  ...               ...   \n",
       "549984         1     2       -                  0.0              -1.0   \n",
       "549985         1     1       3                118.0              55.0   \n",
       "549986         1     1       2                 78.0              78.0   \n",
       "549987         1     2       -                189.0              78.0   \n",
       "549988         1     2       4                 68.0              49.0   \n",
       "\n",
       "        direct_discount_per_unit  quantity_discount_per_unit  \\\n",
       "0                            0.0                        10.0   \n",
       "1                            5.0                        41.0   \n",
       "2                           19.5                         0.0   \n",
       "3                            0.0                        26.0   \n",
       "4                           19.0                         0.0   \n",
       "...                          ...                         ...   \n",
       "549984                       0.0                         0.0   \n",
       "549985                      63.0                         0.0   \n",
       "549986                       0.0                         0.0   \n",
       "549987                     111.0                         0.0   \n",
       "549988                      19.0                         0.0   \n",
       "\n",
       "        bundle_discount_per_unit  coupon_discount_per_unit  gift_item  \n",
       "0                            0.0                       0.0          0  \n",
       "1                            0.0                       0.0          0  \n",
       "2                            0.0                       0.0          0  \n",
       "3                            0.0                       0.0          0  \n",
       "4                            0.0                       6.0          0  \n",
       "...                          ...                       ...        ...  \n",
       "549984                       0.0                       1.0          1  \n",
       "549985                       0.0                       0.0          0  \n",
       "549986                       0.0                       0.0          0  \n",
       "549987                       0.0                       0.0          0  \n",
       "549988                       0.0                       0.0          0  \n",
       "\n",
       "[549989 rows x 15 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders = orders.iloc[:, :15]\n",
    "orders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6c419258dc5268dd",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.639742800Z",
     "start_time": "2024-03-12T12:41:38.515157500Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7444318d01</td>\n",
       "      <td>33a9e56257</td>\n",
       "      <td>067b673f2b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 11:10:40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>99.9</td>\n",
       "      <td>53.9</td>\n",
       "      <td>5.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>f973b01694</td>\n",
       "      <td>4ea3cf408f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 09:13:26.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>58.5</td>\n",
       "      <td>19.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8c1cec8d4b</td>\n",
       "      <td>b87cb736cb</td>\n",
       "      <td>fc5289b139</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 21:29:50.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>d43a33c38a</td>\n",
       "      <td>4829223b6f</td>\n",
       "      <td>623d0a582a</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 19:13:37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>78.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549984</th>\n",
       "      <td>3ad06b9fbe</td>\n",
       "      <td>a27b3ed4d4</td>\n",
       "      <td>a9109972d1</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 01:22:47.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549985</th>\n",
       "      <td>c9d77a7ed0</td>\n",
       "      <td>18f92434cd</td>\n",
       "      <td>7f53769d3f</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 08:55:57.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>118.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549986</th>\n",
       "      <td>b9ad79338f</td>\n",
       "      <td>b5caf8a580</td>\n",
       "      <td>8dc4a01dec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 13:31:01.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549987</th>\n",
       "      <td>be3a9414b1</td>\n",
       "      <td>20ba6655f3</td>\n",
       "      <td>2dd6b818ec</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:51:18.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>189.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549988</th>\n",
       "      <td>02d31f05c9</td>\n",
       "      <td>f260895cbe</td>\n",
       "      <td>10d369ef96</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 18:21:16.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>68.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>549989 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          order_ID     user_ID      sku_ID  order_date             order_time  \\\n",
       "0       d0cf5cc6db  0abe9ef2ce  581d5b54c1  2018-03-01  2018-03-01 17:14:25.0   \n",
       "1       7444318d01  33a9e56257  067b673f2b  2018-03-01  2018-03-01 11:10:40.0   \n",
       "2       f973b01694  4ea3cf408f  623d0a582a  2018-03-01  2018-03-01 09:13:26.0   \n",
       "3       8c1cec8d4b  b87cb736cb  fc5289b139  2018-03-01  2018-03-01 21:29:50.0   \n",
       "4       d43a33c38a  4829223b6f  623d0a582a  2018-03-01  2018-03-01 19:13:37.0   \n",
       "...            ...         ...         ...         ...                    ...   \n",
       "549984  3ad06b9fbe  a27b3ed4d4  a9109972d1  2018-03-31  2018-03-31 01:22:47.0   \n",
       "549985  c9d77a7ed0  18f92434cd  7f53769d3f  2018-03-31  2018-03-31 08:55:57.0   \n",
       "549986  b9ad79338f  b5caf8a580  8dc4a01dec  2018-03-31  2018-03-31 13:31:01.0   \n",
       "549987  be3a9414b1  20ba6655f3  2dd6b818ec  2018-03-31  2018-03-31 12:51:18.0   \n",
       "549988  02d31f05c9  f260895cbe  10d369ef96  2018-03-31  2018-03-31 18:21:16.0   \n",
       "\n",
       "        quantity  type promise  original_unit_price  final_unit_price  \\\n",
       "0              1     2     NaN                 89.0              79.0   \n",
       "1              1     1       2                 99.9              53.9   \n",
       "2              1     1       2                 78.0              58.5   \n",
       "3              1     1       2                 61.0              35.0   \n",
       "4              1     1       1                 78.0              53.0   \n",
       "...          ...   ...     ...                  ...               ...   \n",
       "549984         1     2     NaN                  0.0              -1.0   \n",
       "549985         1     1       3                118.0              55.0   \n",
       "549986         1     1       2                 78.0              78.0   \n",
       "549987         1     2     NaN                189.0              78.0   \n",
       "549988         1     2       4                 68.0              49.0   \n",
       "\n",
       "        direct_discount_per_unit  quantity_discount_per_unit  \\\n",
       "0                            0.0                        10.0   \n",
       "1                            5.0                        41.0   \n",
       "2                           19.5                         0.0   \n",
       "3                            0.0                        26.0   \n",
       "4                           19.0                         0.0   \n",
       "...                          ...                         ...   \n",
       "549984                       0.0                         0.0   \n",
       "549985                      63.0                         0.0   \n",
       "549986                       0.0                         0.0   \n",
       "549987                     111.0                         0.0   \n",
       "549988                      19.0                         0.0   \n",
       "\n",
       "        bundle_discount_per_unit  coupon_discount_per_unit  gift_item  \n",
       "0                            0.0                       0.0          0  \n",
       "1                            0.0                       0.0          0  \n",
       "2                            0.0                       0.0          0  \n",
       "3                            0.0                       0.0          0  \n",
       "4                            0.0                       6.0          0  \n",
       "...                          ...                       ...        ...  \n",
       "549984                       0.0                       1.0          1  \n",
       "549985                       0.0                       0.0          0  \n",
       "549986                       0.0                       0.0          0  \n",
       "549987                       0.0                       0.0          0  \n",
       "549988                       0.0                       0.0          0  \n",
       "\n",
       "[549989 rows x 15 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for column in orders.columns:\n",
    "    orders[column] = orders[column].replace('-', np.nan)\n",
    "orders"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "34e7c2c49bac3b7c",
   "metadata": {},
   "source": [
    "## users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2aa92d3791d7740e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.871504900Z",
     "start_time": "2024-03-12T12:41:38.639742800Z"
    }
   },
   "outputs": [],
   "source": [
    "# 'users' table 用户信息\n",
    "users = pd.read_csv('./JD_data/JD_user_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e64181c8087fccaf",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:38.885602Z",
     "start_time": "2024-03-12T12:41:38.871504900Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_ID</th>\n",
       "      <th>user_level</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>000089d6a6</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-08</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>S</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0000babd1f</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-03</td>\n",
       "      <td>0</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0000bc018b</td>\n",
       "      <td>3</td>\n",
       "      <td>2016-06</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>&gt;=56</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0000d0e5ab</td>\n",
       "      <td>3</td>\n",
       "      <td>2014-06</td>\n",
       "      <td>0</td>\n",
       "      <td>M</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0000dce472</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-08</td>\n",
       "      <td>1</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457293</th>\n",
       "      <td>ffff38690b</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-03</td>\n",
       "      <td>0</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457294</th>\n",
       "      <td>ffffa1a495</td>\n",
       "      <td>4</td>\n",
       "      <td>2011-09</td>\n",
       "      <td>1</td>\n",
       "      <td>M</td>\n",
       "      <td>26-35</td>\n",
       "      <td>S</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457295</th>\n",
       "      <td>ffffb20ef7</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-11</td>\n",
       "      <td>0</td>\n",
       "      <td>M</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457296</th>\n",
       "      <td>ffffc45330</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-04</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>457297</th>\n",
       "      <td>ffffe74cfb</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-10</td>\n",
       "      <td>0</td>\n",
       "      <td>M</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>-1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>457298 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           user_ID  user_level first_order_month  plus gender    age  \\\n",
       "0       000089d6a6           1           2017-08     0      F  26-35   \n",
       "1       0000babd1f           1           2018-03     0      U      U   \n",
       "2       0000bc018b           3           2016-06     0      F   >=56   \n",
       "3       0000d0e5ab           3           2014-06     0      M  26-35   \n",
       "4       0000dce472           3           2012-08     1      U      U   \n",
       "...            ...         ...               ...   ...    ...    ...   \n",
       "457293  ffff38690b           1           2018-03     0      U      U   \n",
       "457294  ffffa1a495           4           2011-09     1      M  26-35   \n",
       "457295  ffffb20ef7           3           2017-11     0      M  36-45   \n",
       "457296  ffffc45330           1           2016-04     0      F  26-35   \n",
       "457297  ffffe74cfb           1           2017-10     0      M  26-35   \n",
       "\n",
       "       marital_status  education  city_level  purchase_power  \n",
       "0                   S          3           4               3  \n",
       "1                   U         -1          -1              -1  \n",
       "2                   M          3           2               3  \n",
       "3                   M          3           2               2  \n",
       "4                   U         -1          -1              -1  \n",
       "...               ...        ...         ...             ...  \n",
       "457293              U         -1          -1              -1  \n",
       "457294              S          3           1               2  \n",
       "457295              M          2           4               2  \n",
       "457296              M         -1          -1              -1  \n",
       "457297              M         -1           3               3  \n",
       "\n",
       "[457298 rows x 10 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0aee8aab83ec97c",
   "metadata": {},
   "source": [
    "---\n",
    "# 合并数据"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20f080d972e9e7dc",
   "metadata": {},
   "source": [
    "## 内连--下单的点击"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "636f22433e2151d4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:45.651876100Z",
     "start_time": "2024-03-12T12:41:38.887597800Z"
    }
   },
   "outputs": [],
   "source": [
    "# 合并数据表，这里使用内连接\n",
    "clor_in = pd.merge(clicks, orders, on=['sku_ID', 'user_ID'], how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "30ae56464ccbc716",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:45.671655Z",
     "start_time": "2024-03-12T12:41:45.652884700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:50:40</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:53:56</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:51:59</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:53:24</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635497</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 10:43:03</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>179.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635498</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 11:12:16</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>179.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635499</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 10:41:59</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>179.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635500</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 11:11:30</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>179.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635501</th>\n",
       "      <td>fa823767ca</td>\n",
       "      <td>20397c54a9</td>\n",
       "      <td>2018-03-31 18:02:00</td>\n",
       "      <td>pc</td>\n",
       "      <td>509213d72a</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 18:02:46.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2635502 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "0        09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat  e4874e2a00   \n",
       "1        09b70fcd83  2791ec4485  2018-03-01 13:50:40  wechat  e4874e2a00   \n",
       "2        09b70fcd83  2791ec4485  2018-03-01 13:53:56  wechat  e4874e2a00   \n",
       "3        09b70fcd83  2791ec4485  2018-03-01 13:51:59  wechat  e4874e2a00   \n",
       "4        09b70fcd83  2791ec4485  2018-03-01 13:53:24  wechat  e4874e2a00   \n",
       "...             ...         ...                  ...     ...         ...   \n",
       "2635497  afd795015b  1f77ae7686  2018-03-31 10:43:03      pc  45aed6f6b3   \n",
       "2635498  afd795015b  1f77ae7686  2018-03-31 11:12:16      pc  45aed6f6b3   \n",
       "2635499  afd795015b  1f77ae7686  2018-03-31 10:41:59      pc  45aed6f6b3   \n",
       "2635500  afd795015b  1f77ae7686  2018-03-31 11:11:30      pc  45aed6f6b3   \n",
       "2635501  fa823767ca  20397c54a9  2018-03-31 18:02:00      pc  509213d72a   \n",
       "\n",
       "         order_date             order_time  quantity  type promise  \\\n",
       "0        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN   \n",
       "1        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN   \n",
       "2        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN   \n",
       "3        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN   \n",
       "4        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN   \n",
       "...             ...                    ...       ...   ...     ...   \n",
       "2635497  2018-03-31  2018-03-31 11:08:42.0         1     1       2   \n",
       "2635498  2018-03-31  2018-03-31 11:08:42.0         1     1       2   \n",
       "2635499  2018-03-31  2018-03-31 11:08:42.0         1     1       2   \n",
       "2635500  2018-03-31  2018-03-31 11:08:42.0         1     1       2   \n",
       "2635501  2018-03-31  2018-03-31 18:02:46.0         1     1       2   \n",
       "\n",
       "         original_unit_price  final_unit_price  direct_discount_per_unit  \\\n",
       "0                       88.0              49.0                      39.0   \n",
       "1                       88.0              49.0                      39.0   \n",
       "2                       88.0              49.0                      39.0   \n",
       "3                       88.0              49.0                      39.0   \n",
       "4                       88.0              49.0                      39.0   \n",
       "...                      ...               ...                       ...   \n",
       "2635497                179.0             139.0                      40.0   \n",
       "2635498                179.0             139.0                      40.0   \n",
       "2635499                179.0             139.0                      40.0   \n",
       "2635500                179.0             139.0                      40.0   \n",
       "2635501                 79.0              73.0                       0.0   \n",
       "\n",
       "         quantity_discount_per_unit  bundle_discount_per_unit  \\\n",
       "0                               0.0                       0.0   \n",
       "1                               0.0                       0.0   \n",
       "2                               0.0                       0.0   \n",
       "3                               0.0                       0.0   \n",
       "4                               0.0                       0.0   \n",
       "...                             ...                       ...   \n",
       "2635497                         0.0                       0.0   \n",
       "2635498                         0.0                       0.0   \n",
       "2635499                         0.0                       0.0   \n",
       "2635500                         0.0                       0.0   \n",
       "2635501                         0.0                       0.0   \n",
       "\n",
       "         coupon_discount_per_unit  gift_item  \n",
       "0                             0.0          0  \n",
       "1                             0.0          0  \n",
       "2                             0.0          0  \n",
       "3                             0.0          0  \n",
       "4                             0.0          0  \n",
       "...                           ...        ...  \n",
       "2635497                       0.0          0  \n",
       "2635498                       0.0          0  \n",
       "2635499                       0.0          0  \n",
       "2635500                       0.0          0  \n",
       "2635501                       6.0          0  \n",
       "\n",
       "[2635502 rows x 17 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_in"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "edbb0fa3ed4dffc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:09:44.180826800Z",
     "start_time": "2024-03-12T13:09:42.336649700Z"
    }
   },
   "outputs": [],
   "source": [
    "clor_us_in = pd.merge(clor_in, users, on=['user_ID'], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "b6e2d46f00c5768c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:09:53.868378200Z",
     "start_time": "2024-03-12T13:09:52.758094400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>...</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>user_level</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:50:40</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:53:56</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:51:59</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 13:53:24</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635497</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 10:43:03</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2015-08</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635498</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 11:12:16</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2015-08</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635499</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 10:41:59</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2015-08</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635500</th>\n",
       "      <td>afd795015b</td>\n",
       "      <td>1f77ae7686</td>\n",
       "      <td>2018-03-31 11:11:30</td>\n",
       "      <td>pc</td>\n",
       "      <td>45aed6f6b3</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 11:08:42.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2015-08</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2635501</th>\n",
       "      <td>fa823767ca</td>\n",
       "      <td>20397c54a9</td>\n",
       "      <td>2018-03-31 18:02:00</td>\n",
       "      <td>pc</td>\n",
       "      <td>509213d72a</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 18:02:46.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010-02</td>\n",
       "      <td>1</td>\n",
       "      <td>M</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2635502 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "0        09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat  e4874e2a00   \n",
       "1        09b70fcd83  2791ec4485  2018-03-01 13:50:40  wechat  e4874e2a00   \n",
       "2        09b70fcd83  2791ec4485  2018-03-01 13:53:56  wechat  e4874e2a00   \n",
       "3        09b70fcd83  2791ec4485  2018-03-01 13:51:59  wechat  e4874e2a00   \n",
       "4        09b70fcd83  2791ec4485  2018-03-01 13:53:24  wechat  e4874e2a00   \n",
       "...             ...         ...                  ...     ...         ...   \n",
       "2635497  afd795015b  1f77ae7686  2018-03-31 10:43:03      pc  45aed6f6b3   \n",
       "2635498  afd795015b  1f77ae7686  2018-03-31 11:12:16      pc  45aed6f6b3   \n",
       "2635499  afd795015b  1f77ae7686  2018-03-31 10:41:59      pc  45aed6f6b3   \n",
       "2635500  afd795015b  1f77ae7686  2018-03-31 11:11:30      pc  45aed6f6b3   \n",
       "2635501  fa823767ca  20397c54a9  2018-03-31 18:02:00      pc  509213d72a   \n",
       "\n",
       "         order_date             order_time  quantity  type promise  ...  \\\n",
       "0        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN  ...   \n",
       "1        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN  ...   \n",
       "2        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN  ...   \n",
       "3        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN  ...   \n",
       "4        2018-03-01  2018-03-01 14:08:33.0         1     2     NaN  ...   \n",
       "...             ...                    ...       ...   ...     ...  ...   \n",
       "2635497  2018-03-31  2018-03-31 11:08:42.0         1     1       2  ...   \n",
       "2635498  2018-03-31  2018-03-31 11:08:42.0         1     1       2  ...   \n",
       "2635499  2018-03-31  2018-03-31 11:08:42.0         1     1       2  ...   \n",
       "2635500  2018-03-31  2018-03-31 11:08:42.0         1     1       2  ...   \n",
       "2635501  2018-03-31  2018-03-31 18:02:46.0         1     1       2  ...   \n",
       "\n",
       "         gift_item  user_level  first_order_month  plus  gender    age  \\\n",
       "0                0           1            2018-02     0       F  26-35   \n",
       "1                0           1            2018-02     0       F  26-35   \n",
       "2                0           1            2018-02     0       F  26-35   \n",
       "3                0           1            2018-02     0       F  26-35   \n",
       "4                0           1            2018-02     0       F  26-35   \n",
       "...            ...         ...                ...   ...     ...    ...   \n",
       "2635497          0           3            2015-08     0       F  36-45   \n",
       "2635498          0           3            2015-08     0       F  36-45   \n",
       "2635499          0           3            2015-08     0       F  36-45   \n",
       "2635500          0           3            2015-08     0       F  36-45   \n",
       "2635501          0           4            2010-02     1       M  26-35   \n",
       "\n",
       "         marital_status  education city_level  purchase_power  \n",
       "0                     M          2          3               3  \n",
       "1                     M          2          3               3  \n",
       "2                     M          2          3               3  \n",
       "3                     M          2          3               3  \n",
       "4                     M          2          3               3  \n",
       "...                 ...        ...        ...             ...  \n",
       "2635497               M          3          3               2  \n",
       "2635498               M          3          3               2  \n",
       "2635499               M          3          3               2  \n",
       "2635500               M          3          3               2  \n",
       "2635501               M          3          1               2  \n",
       "\n",
       "[2635502 rows x 26 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_in"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c452570fa33bc391",
   "metadata": {},
   "source": [
    "### user_ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9884518651752ad0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:47.898665900Z",
     "start_time": "2024-03-12T12:41:47.849663800Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85855</th>\n",
       "      <td>9d74489696</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>38d636d2a6</td>\n",
       "      <td>2018-03-04</td>\n",
       "      <td>2018-03-04 21:52:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>129.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85857</th>\n",
       "      <td>9d74489696</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>6717b7c979</td>\n",
       "      <td>2018-03-04</td>\n",
       "      <td>2018-03-04 21:52:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         order_ID     user_ID      sku_ID  order_date             order_time  \\\n",
       "0      d0cf5cc6db  0abe9ef2ce  581d5b54c1  2018-03-01  2018-03-01 17:14:25.0   \n",
       "85855  9d74489696  0abe9ef2ce  38d636d2a6  2018-03-04  2018-03-04 21:52:33.0   \n",
       "85857  9d74489696  0abe9ef2ce  6717b7c979  2018-03-04  2018-03-04 21:52:33.0   \n",
       "\n",
       "       quantity  type promise  original_unit_price  final_unit_price  \\\n",
       "0             1     2     NaN                 89.0              79.0   \n",
       "85855         1     1       2                129.0             108.0   \n",
       "85857         1     1       2                  0.0               0.0   \n",
       "\n",
       "       direct_discount_per_unit  quantity_discount_per_unit  \\\n",
       "0                           0.0                        10.0   \n",
       "85855                      21.0                         0.0   \n",
       "85857                       0.0                         0.0   \n",
       "\n",
       "       bundle_discount_per_unit  coupon_discount_per_unit  gift_item  \n",
       "0                           0.0                       0.0          0  \n",
       "85855                       0.0                       0.0          0  \n",
       "85857                       0.0                       0.0          1  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders[orders['user_ID'] == '0abe9ef2ce']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "28adbab5745333c8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:48.597050400Z",
     "start_time": "2024-03-12T12:41:47.879093500Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>69489</th>\n",
       "      <td>97a6e047ca</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:09:43</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75660</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:09:41</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75703</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:07:02</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>216500</th>\n",
       "      <td>01c9d91829</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:13:49</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>252621</th>\n",
       "      <td>9f7388280e</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:09:06</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258584</th>\n",
       "      <td>d0f488ea78</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:03:53</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258608</th>\n",
       "      <td>d0f488ea78</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:04:34</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>354796</th>\n",
       "      <td>989aa91456</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:12:26</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2376631</th>\n",
       "      <td>6717b7c979</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-04 21:55:55</td>\n",
       "      <td>mobile</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             sku_ID     user_ID         request_time channel\n",
       "69489    97a6e047ca  0abe9ef2ce  2018-03-01 16:09:43  mobile\n",
       "75660    581d5b54c1  0abe9ef2ce  2018-03-01 17:09:41  mobile\n",
       "75703    581d5b54c1  0abe9ef2ce  2018-03-01 17:07:02  mobile\n",
       "216500   01c9d91829  0abe9ef2ce  2018-03-01 16:13:49  mobile\n",
       "252621   9f7388280e  0abe9ef2ce  2018-03-01 16:09:06  mobile\n",
       "258584   d0f488ea78  0abe9ef2ce  2018-03-01 17:03:53  mobile\n",
       "258608   d0f488ea78  0abe9ef2ce  2018-03-01 17:04:34  mobile\n",
       "354796   989aa91456  0abe9ef2ce  2018-03-01 16:12:26  mobile\n",
       "2376631  6717b7c979  0abe9ef2ce  2018-03-04 21:55:55  mobile"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clicks[clicks['user_ID'] == '0abe9ef2ce']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f8b6889ebdd87de",
   "metadata": {},
   "source": [
    "一个user 有多次点击"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5d5f124683595bf1",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:49.224377Z",
     "start_time": "2024-03-12T12:41:48.597050400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>...</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>user_level</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13647</th>\n",
       "      <td>924abbb0e3</td>\n",
       "      <td>3f8a38fb7c</td>\n",
       "      <td>2018-03-01 12:34:06</td>\n",
       "      <td>wechat</td>\n",
       "      <td>f4966262e3</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 12:33:31.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-11</td>\n",
       "      <td>0</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13648</th>\n",
       "      <td>924abbb0e3</td>\n",
       "      <td>3f8a38fb7c</td>\n",
       "      <td>2018-03-01 12:17:00</td>\n",
       "      <td>wechat</td>\n",
       "      <td>f4966262e3</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 12:33:31.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-11</td>\n",
       "      <td>0</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>U</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>471341</th>\n",
       "      <td>c4ea4afceb</td>\n",
       "      <td>cd7832e847</td>\n",
       "      <td>2018-03-11 12:11:13</td>\n",
       "      <td>app</td>\n",
       "      <td>8ad9dbee32</td>\n",
       "      <td>2018-03-11</td>\n",
       "      <td>2018-03-11 12:11:47.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2015-03</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "13647   924abbb0e3  3f8a38fb7c  2018-03-01 12:34:06  wechat  f4966262e3   \n",
       "13648   924abbb0e3  3f8a38fb7c  2018-03-01 12:17:00  wechat  f4966262e3   \n",
       "471341  c4ea4afceb  cd7832e847  2018-03-11 12:11:13     app  8ad9dbee32   \n",
       "\n",
       "        order_date             order_time  quantity  type promise  ...  \\\n",
       "13647   2018-03-01  2018-03-01 12:33:31.0         1     2       3  ...   \n",
       "13648   2018-03-01  2018-03-01 12:33:31.0         1     2       3  ...   \n",
       "471341  2018-03-11  2018-03-11 12:11:47.0         2     1       1  ...   \n",
       "\n",
       "        gift_item  user_level  first_order_month  plus  gender    age  \\\n",
       "13647           0           1            2016-11     0       U      U   \n",
       "13648           0           1            2016-11     0       U      U   \n",
       "471341          0           4            2015-03     0       F  26-35   \n",
       "\n",
       "        marital_status  education city_level  purchase_power  \n",
       "13647                U         -1         -1              -1  \n",
       "13648                U         -1         -1              -1  \n",
       "471341               M          3          4               2  \n",
       "\n",
       "[3 rows x 26 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_in[clor_in['user_ID'] == '0abe9ef2ce']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7d5a3ccb140c32c",
   "metadata": {},
   "source": [
    "### sku_ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3074f8cca6154a61",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:49.298701100Z",
     "start_time": "2024-03-12T12:41:49.217342600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>e0f5386d87</td>\n",
       "      <td>0b07cae293</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 21:09:15.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>79.9</td>\n",
       "      <td>38.9</td>\n",
       "      <td>4.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>3eb681f13b</td>\n",
       "      <td>5ccd5438ea</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:45.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>36.9</td>\n",
       "      <td>4.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>b28c981a51</td>\n",
       "      <td>56932e545d</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:50:05.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>38.9</td>\n",
       "      <td>4.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>8f603304b9</td>\n",
       "      <td>77cd0a609d</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 15:50:41.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>39.9</td>\n",
       "      <td>4.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381</th>\n",
       "      <td>b642a3fbc3</td>\n",
       "      <td>05d6b412e1</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 16:34:31.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>18.9</td>\n",
       "      <td>4.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>548620</th>\n",
       "      <td>ff73205010</td>\n",
       "      <td>6851c7514b</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 17:48:37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>69.9</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>548924</th>\n",
       "      <td>9fe6e5045f</td>\n",
       "      <td>1b2f0a1bb6</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 22:32:30.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>69.9</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>548950</th>\n",
       "      <td>e571ee59e2</td>\n",
       "      <td>0f5ee5bd2b</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 23:01:35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>79.9</td>\n",
       "      <td>69.9</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549129</th>\n",
       "      <td>93ff2dcdf3</td>\n",
       "      <td>c7d2a39381</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 21:19:33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>79.9</td>\n",
       "      <td>69.9</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>549400</th>\n",
       "      <td>2fef0bd08c</td>\n",
       "      <td>0b8c13a472</td>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 10:06:03.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>79.9</td>\n",
       "      <td>69.9</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5245 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          order_ID     user_ID      sku_ID  order_date             order_time  \\\n",
       "5       e0f5386d87  0b07cae293  589c2b865b  2018-03-01  2018-03-01 21:09:15.0   \n",
       "49      3eb681f13b  5ccd5438ea  589c2b865b  2018-03-01  2018-03-01 17:14:45.0   \n",
       "126     b28c981a51  56932e545d  589c2b865b  2018-03-01  2018-03-01 10:50:05.0   \n",
       "136     8f603304b9  77cd0a609d  589c2b865b  2018-03-01  2018-03-01 15:50:41.0   \n",
       "381     b642a3fbc3  05d6b412e1  589c2b865b  2018-03-01  2018-03-01 16:34:31.0   \n",
       "...            ...         ...         ...         ...                    ...   \n",
       "548620  ff73205010  6851c7514b  589c2b865b  2018-03-31  2018-03-31 17:48:37.0   \n",
       "548924  9fe6e5045f  1b2f0a1bb6  589c2b865b  2018-03-31  2018-03-31 22:32:30.0   \n",
       "548950  e571ee59e2  0f5ee5bd2b  589c2b865b  2018-03-31  2018-03-31 23:01:35.0   \n",
       "549129  93ff2dcdf3  c7d2a39381  589c2b865b  2018-03-31  2018-03-31 21:19:33.0   \n",
       "549400  2fef0bd08c  0b8c13a472  589c2b865b  2018-03-31  2018-03-31 10:06:03.0   \n",
       "\n",
       "        quantity  type promise  original_unit_price  final_unit_price  \\\n",
       "5              1     1       1                 79.9              38.9   \n",
       "49             1     1       2                 79.9              36.9   \n",
       "126            1     1       2                 79.9              38.9   \n",
       "136            1     1       2                 79.9              39.9   \n",
       "381            1     1       2                 79.9              18.9   \n",
       "...          ...   ...     ...                  ...               ...   \n",
       "548620         1     1       2                 79.9              69.9   \n",
       "548924         1     1       2                 79.9              69.9   \n",
       "548950         1     1       1                 79.9              69.9   \n",
       "549129         1     1       1                 79.9              69.9   \n",
       "549400         1     1       2                 79.9              69.9   \n",
       "\n",
       "        direct_discount_per_unit  quantity_discount_per_unit  \\\n",
       "5                            4.0                        37.0   \n",
       "49                           4.0                        37.0   \n",
       "126                          4.0                        37.0   \n",
       "136                          4.0                        36.0   \n",
       "381                          4.0                        37.0   \n",
       "...                          ...                         ...   \n",
       "548620                      10.0                         0.0   \n",
       "548924                      10.0                         0.0   \n",
       "548950                      10.0                         0.0   \n",
       "549129                      10.0                         0.0   \n",
       "549400                      10.0                         0.0   \n",
       "\n",
       "        bundle_discount_per_unit  coupon_discount_per_unit  gift_item  \n",
       "5                            0.0                       0.0          0  \n",
       "49                           0.0                       2.0          0  \n",
       "126                          0.0                       0.0          0  \n",
       "136                          0.0                       0.0          0  \n",
       "381                          0.0                      20.0          0  \n",
       "...                          ...                       ...        ...  \n",
       "548620                       0.0                       0.0          0  \n",
       "548924                       0.0                       0.0          0  \n",
       "548950                       0.0                       0.0          0  \n",
       "549129                       0.0                       0.0          0  \n",
       "549400                       0.0                       0.0          0  \n",
       "\n",
       "[5245 rows x 15 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders[orders['sku_ID'] == '589c2b865b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "6165dc38c9cef7a7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:49.944603400Z",
     "start_time": "2024-03-12T12:41:49.263756300Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>70277</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 10:46:46</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70278</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>4a8d9c1653</td>\n",
       "      <td>2018-03-01 00:24:21</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70279</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>4a8d9c1653</td>\n",
       "      <td>2018-03-01 00:25:19</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70280</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 17:53:43</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70281</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 17:54:55</td>\n",
       "      <td>wechat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20203764</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>0da34a78fc</td>\n",
       "      <td>2018-03-31 16:39:22</td>\n",
       "      <td>pc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20203765</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>c6cc1738c6</td>\n",
       "      <td>2018-03-31 17:52:11</td>\n",
       "      <td>pc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20203766</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>86e5aaeebb</td>\n",
       "      <td>2018-03-31 20:38:29</td>\n",
       "      <td>pc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20203767</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 22:28:39</td>\n",
       "      <td>pc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20203768</th>\n",
       "      <td>589c2b865b</td>\n",
       "      <td>f88866cb51</td>\n",
       "      <td>2018-03-31 18:48:24</td>\n",
       "      <td>pc</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>138467 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel\n",
       "70277     589c2b865b         NaN  2018-03-01 10:46:46  wechat\n",
       "70278     589c2b865b  4a8d9c1653  2018-03-01 00:24:21  wechat\n",
       "70279     589c2b865b  4a8d9c1653  2018-03-01 00:25:19  wechat\n",
       "70280     589c2b865b         NaN  2018-03-01 17:53:43  wechat\n",
       "70281     589c2b865b         NaN  2018-03-01 17:54:55  wechat\n",
       "...              ...         ...                  ...     ...\n",
       "20203764  589c2b865b  0da34a78fc  2018-03-31 16:39:22      pc\n",
       "20203765  589c2b865b  c6cc1738c6  2018-03-31 17:52:11      pc\n",
       "20203766  589c2b865b  86e5aaeebb  2018-03-31 20:38:29      pc\n",
       "20203767  589c2b865b         NaN  2018-03-31 22:28:39      pc\n",
       "20203768  589c2b865b  f88866cb51  2018-03-31 18:48:24      pc\n",
       "\n",
       "[138467 rows x 4 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clicks[clicks['sku_ID'] == '589c2b865b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "42583023382de3b1",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:50.067095200Z",
     "start_time": "2024-03-12T12:41:49.936934300Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>...</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>user_level</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>f84be0fe5c</td>\n",
       "      <td>2018-03-01 10:47:02</td>\n",
       "      <td>wechat</td>\n",
       "      <td>c73133ac85</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:53:23.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-07</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>f84be0fe5c</td>\n",
       "      <td>2018-03-01 10:52:00</td>\n",
       "      <td>wechat</td>\n",
       "      <td>c73133ac85</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:53:23.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-07</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>f84be0fe5c</td>\n",
       "      <td>2018-03-01 10:50:27</td>\n",
       "      <td>wechat</td>\n",
       "      <td>c73133ac85</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:53:23.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-07</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>f84be0fe5c</td>\n",
       "      <td>2018-03-01 10:46:38</td>\n",
       "      <td>wechat</td>\n",
       "      <td>c73133ac85</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:53:23.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-07</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>f84be0fe5c</td>\n",
       "      <td>2018-03-01 10:53:49</td>\n",
       "      <td>wechat</td>\n",
       "      <td>c73133ac85</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 10:53:23.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-07</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598529</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>6928cc84d1</td>\n",
       "      <td>2018-03-31 12:14:20</td>\n",
       "      <td>app</td>\n",
       "      <td>daa16bc43d</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:18:56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-11</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598530</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>6928cc84d1</td>\n",
       "      <td>2018-03-31 12:11:08</td>\n",
       "      <td>app</td>\n",
       "      <td>daa16bc43d</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:18:56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-11</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598531</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>6928cc84d1</td>\n",
       "      <td>2018-03-31 12:13:07</td>\n",
       "      <td>app</td>\n",
       "      <td>daa16bc43d</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:18:56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-11</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598532</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>6928cc84d1</td>\n",
       "      <td>2018-03-31 12:16:45</td>\n",
       "      <td>app</td>\n",
       "      <td>daa16bc43d</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:18:56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-11</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598533</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>6928cc84d1</td>\n",
       "      <td>2018-03-31 12:17:19</td>\n",
       "      <td>app</td>\n",
       "      <td>daa16bc43d</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>2018-03-31 12:18:56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-11</td>\n",
       "      <td>0</td>\n",
       "      <td>F</td>\n",
       "      <td>36-45</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10375 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "67       581d5b54c1  f84be0fe5c  2018-03-01 10:47:02  wechat  c73133ac85   \n",
       "68       581d5b54c1  f84be0fe5c  2018-03-01 10:52:00  wechat  c73133ac85   \n",
       "69       581d5b54c1  f84be0fe5c  2018-03-01 10:50:27  wechat  c73133ac85   \n",
       "70       581d5b54c1  f84be0fe5c  2018-03-01 10:46:38  wechat  c73133ac85   \n",
       "71       581d5b54c1  f84be0fe5c  2018-03-01 10:53:49  wechat  c73133ac85   \n",
       "...             ...         ...                  ...     ...         ...   \n",
       "2598529  581d5b54c1  6928cc84d1  2018-03-31 12:14:20     app  daa16bc43d   \n",
       "2598530  581d5b54c1  6928cc84d1  2018-03-31 12:11:08     app  daa16bc43d   \n",
       "2598531  581d5b54c1  6928cc84d1  2018-03-31 12:13:07     app  daa16bc43d   \n",
       "2598532  581d5b54c1  6928cc84d1  2018-03-31 12:16:45     app  daa16bc43d   \n",
       "2598533  581d5b54c1  6928cc84d1  2018-03-31 12:17:19     app  daa16bc43d   \n",
       "\n",
       "         order_date             order_time  quantity  type promise  ...  \\\n",
       "67       2018-03-01  2018-03-01 10:53:23.0         1     2     NaN  ...   \n",
       "68       2018-03-01  2018-03-01 10:53:23.0         1     2     NaN  ...   \n",
       "69       2018-03-01  2018-03-01 10:53:23.0         1     2     NaN  ...   \n",
       "70       2018-03-01  2018-03-01 10:53:23.0         1     2     NaN  ...   \n",
       "71       2018-03-01  2018-03-01 10:53:23.0         1     2     NaN  ...   \n",
       "...             ...                    ...       ...   ...     ...  ...   \n",
       "2598529  2018-03-31  2018-03-31 12:18:56.0         1     2     NaN  ...   \n",
       "2598530  2018-03-31  2018-03-31 12:18:56.0         1     2     NaN  ...   \n",
       "2598531  2018-03-31  2018-03-31 12:18:56.0         1     2     NaN  ...   \n",
       "2598532  2018-03-31  2018-03-31 12:18:56.0         1     2     NaN  ...   \n",
       "2598533  2018-03-31  2018-03-31 12:18:56.0         1     2     NaN  ...   \n",
       "\n",
       "         gift_item  user_level  first_order_month  plus  gender    age  \\\n",
       "67               0           2            2015-07     0       F  26-35   \n",
       "68               0           2            2015-07     0       F  26-35   \n",
       "69               0           2            2015-07     0       F  26-35   \n",
       "70               0           2            2015-07     0       F  26-35   \n",
       "71               0           2            2015-07     0       F  26-35   \n",
       "...            ...         ...                ...   ...     ...    ...   \n",
       "2598529          0           2            2012-11     0       F  36-45   \n",
       "2598530          0           2            2012-11     0       F  36-45   \n",
       "2598531          0           2            2012-11     0       F  36-45   \n",
       "2598532          0           2            2012-11     0       F  36-45   \n",
       "2598533          0           2            2012-11     0       F  36-45   \n",
       "\n",
       "         marital_status  education city_level  purchase_power  \n",
       "67                    M          1          2               3  \n",
       "68                    M          1          2               3  \n",
       "69                    M          1          2               3  \n",
       "70                    M          1          2               3  \n",
       "71                    M          1          2               3  \n",
       "...                 ...        ...        ...             ...  \n",
       "2598529               M          3          1               2  \n",
       "2598530               M          3          1               2  \n",
       "2598531               M          3          1               2  \n",
       "2598532               M          3          1               2  \n",
       "2598533               M          3          1               2  \n",
       "\n",
       "[10375 rows x 26 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_in[clor_us_in['sku_ID'] == '581d5b54c1']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9615856b6b0586b",
   "metadata": {},
   "source": [
    "## 外连--是否下单的点击都包含"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "fe18987f530c4be",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:59.089253200Z",
     "start_time": "2024-03-12T12:41:50.064094300Z"
    }
   },
   "outputs": [],
   "source": [
    "# 合并数据表，这里使用左连接\n",
    "clor_out = pd.merge(clicks, orders, on=['sku_ID', 'user_ID'], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "38d10d5b050af2aa",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:41:59.105835900Z",
     "start_time": "2024-03-12T12:41:59.090265800Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>4c3d6d10c2</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>eb0718c1c9</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>59f84cf342</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321452</th>\n",
       "      <td>a8a96e022a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 21:45:07</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321453</th>\n",
       "      <td>eb3f2d2fd8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:11</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321454</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:08</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321455</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 19:28:25</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321456</th>\n",
       "      <td>87b853b910</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 06:29:47</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20321457 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "0         a234e08c57  4c3d6d10c2  2018-03-01 23:57:53  wechat         NaN   \n",
       "1         6449e1fd87         NaN  2018-03-01 16:13:48  wechat         NaN   \n",
       "2         09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat  e4874e2a00   \n",
       "3         09b70fcd83  eb0718c1c9  2018-03-01 16:34:08  wechat         NaN   \n",
       "4         09b70fcd83  59f84cf342  2018-03-01 22:20:35  wechat         NaN   \n",
       "...              ...         ...                  ...     ...         ...   \n",
       "20321452  a8a96e022a         NaN  2018-03-31 21:45:07  others         NaN   \n",
       "20321453  eb3f2d2fd8         NaN  2018-03-31 11:31:11  others         NaN   \n",
       "20321454  fbce41fd82         NaN  2018-03-31 11:31:08  others         NaN   \n",
       "20321455  fbce41fd82         NaN  2018-03-31 19:28:25  others         NaN   \n",
       "20321456  87b853b910         NaN  2018-03-31 06:29:47  others         NaN   \n",
       "\n",
       "          order_date             order_time  quantity  type promise  \\\n",
       "0                NaN                    NaN       NaN   NaN     NaN   \n",
       "1                NaN                    NaN       NaN   NaN     NaN   \n",
       "2         2018-03-01  2018-03-01 14:08:33.0       1.0   2.0     NaN   \n",
       "3                NaN                    NaN       NaN   NaN     NaN   \n",
       "4                NaN                    NaN       NaN   NaN     NaN   \n",
       "...              ...                    ...       ...   ...     ...   \n",
       "20321452         NaN                    NaN       NaN   NaN     NaN   \n",
       "20321453         NaN                    NaN       NaN   NaN     NaN   \n",
       "20321454         NaN                    NaN       NaN   NaN     NaN   \n",
       "20321455         NaN                    NaN       NaN   NaN     NaN   \n",
       "20321456         NaN                    NaN       NaN   NaN     NaN   \n",
       "\n",
       "          original_unit_price  final_unit_price  direct_discount_per_unit  \\\n",
       "0                         NaN               NaN                       NaN   \n",
       "1                         NaN               NaN                       NaN   \n",
       "2                        88.0              49.0                      39.0   \n",
       "3                         NaN               NaN                       NaN   \n",
       "4                         NaN               NaN                       NaN   \n",
       "...                       ...               ...                       ...   \n",
       "20321452                  NaN               NaN                       NaN   \n",
       "20321453                  NaN               NaN                       NaN   \n",
       "20321454                  NaN               NaN                       NaN   \n",
       "20321455                  NaN               NaN                       NaN   \n",
       "20321456                  NaN               NaN                       NaN   \n",
       "\n",
       "          quantity_discount_per_unit  bundle_discount_per_unit  \\\n",
       "0                                NaN                       NaN   \n",
       "1                                NaN                       NaN   \n",
       "2                                0.0                       0.0   \n",
       "3                                NaN                       NaN   \n",
       "4                                NaN                       NaN   \n",
       "...                              ...                       ...   \n",
       "20321452                         NaN                       NaN   \n",
       "20321453                         NaN                       NaN   \n",
       "20321454                         NaN                       NaN   \n",
       "20321455                         NaN                       NaN   \n",
       "20321456                         NaN                       NaN   \n",
       "\n",
       "          coupon_discount_per_unit  gift_item  \n",
       "0                              NaN        NaN  \n",
       "1                              NaN        NaN  \n",
       "2                              0.0        0.0  \n",
       "3                              NaN        NaN  \n",
       "4                              NaN        NaN  \n",
       "...                            ...        ...  \n",
       "20321452                       NaN        NaN  \n",
       "20321453                       NaN        NaN  \n",
       "20321454                       NaN        NaN  \n",
       "20321455                       NaN        NaN  \n",
       "20321456                       NaN        NaN  \n",
       "\n",
       "[20321457 rows x 17 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f6a9298b86bea14d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:08.032712400Z",
     "start_time": "2024-03-12T12:41:59.105835900Z"
    }
   },
   "outputs": [],
   "source": [
    "clor_us_out = pd.merge(clor_out, users, on=['user_ID'], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c6fdc4162f6a4525",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:15.882274700Z",
     "start_time": "2024-03-12T12:42:08.033765900Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>...</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>user_level</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>4c3d6d10c2</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0.0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>eb0718c1c9</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>59f84cf342</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321452</th>\n",
       "      <td>a8a96e022a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 21:45:07</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321453</th>\n",
       "      <td>eb3f2d2fd8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:11</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321454</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:08</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321455</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 19:28:25</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321456</th>\n",
       "      <td>87b853b910</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 06:29:47</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20321457 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "0         a234e08c57  4c3d6d10c2  2018-03-01 23:57:53  wechat         NaN   \n",
       "1         6449e1fd87         NaN  2018-03-01 16:13:48  wechat         NaN   \n",
       "2         09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat  e4874e2a00   \n",
       "3         09b70fcd83  eb0718c1c9  2018-03-01 16:34:08  wechat         NaN   \n",
       "4         09b70fcd83  59f84cf342  2018-03-01 22:20:35  wechat         NaN   \n",
       "...              ...         ...                  ...     ...         ...   \n",
       "20321452  a8a96e022a         NaN  2018-03-31 21:45:07  others         NaN   \n",
       "20321453  eb3f2d2fd8         NaN  2018-03-31 11:31:11  others         NaN   \n",
       "20321454  fbce41fd82         NaN  2018-03-31 11:31:08  others         NaN   \n",
       "20321455  fbce41fd82         NaN  2018-03-31 19:28:25  others         NaN   \n",
       "20321456  87b853b910         NaN  2018-03-31 06:29:47  others         NaN   \n",
       "\n",
       "          order_date             order_time  quantity  type promise  ...  \\\n",
       "0                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "1                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "2         2018-03-01  2018-03-01 14:08:33.0       1.0   2.0     NaN  ...   \n",
       "3                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "4                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "...              ...                    ...       ...   ...     ...  ...   \n",
       "20321452         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321453         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321454         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321455         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321456         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "\n",
       "          gift_item  user_level  first_order_month  plus  gender    age  \\\n",
       "0               NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "1               NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "2               0.0         1.0            2018-02   0.0       F  26-35   \n",
       "3               NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "4               NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "...             ...         ...                ...   ...     ...    ...   \n",
       "20321452        NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "20321453        NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "20321454        NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "20321455        NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "20321456        NaN         NaN                NaN   NaN     NaN    NaN   \n",
       "\n",
       "          marital_status  education city_level  purchase_power  \n",
       "0                    NaN        NaN        NaN             NaN  \n",
       "1                    NaN        NaN        NaN             NaN  \n",
       "2                      M        2.0        3.0             3.0  \n",
       "3                    NaN        NaN        NaN             NaN  \n",
       "4                    NaN        NaN        NaN             NaN  \n",
       "...                  ...        ...        ...             ...  \n",
       "20321452             NaN        NaN        NaN             NaN  \n",
       "20321453             NaN        NaN        NaN             NaN  \n",
       "20321454             NaN        NaN        NaN             NaN  \n",
       "20321455             NaN        NaN        NaN             NaN  \n",
       "20321456             NaN        NaN        NaN             NaN  \n",
       "\n",
       "[20321457 rows x 26 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_out"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7ab06aee30fbb3e",
   "metadata": {},
   "source": [
    "### user_ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "229fc5ad1dead3ba",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:19.009903500Z",
     "start_time": "2024-03-12T12:42:16.540969200Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>69676</th>\n",
       "      <td>97a6e047ca</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:09:43</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75864</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:09:41</td>\n",
       "      <td>mobile</td>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75907</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:07:02</td>\n",
       "      <td>mobile</td>\n",
       "      <td>d0cf5cc6db</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 17:14:25.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>217474</th>\n",
       "      <td>01c9d91829</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:13:49</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253823</th>\n",
       "      <td>9f7388280e</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:09:06</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259819</th>\n",
       "      <td>d0f488ea78</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:03:53</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259843</th>\n",
       "      <td>d0f488ea78</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 17:04:34</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356603</th>\n",
       "      <td>989aa91456</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-01 16:12:26</td>\n",
       "      <td>mobile</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2392643</th>\n",
       "      <td>6717b7c979</td>\n",
       "      <td>0abe9ef2ce</td>\n",
       "      <td>2018-03-04 21:55:55</td>\n",
       "      <td>mobile</td>\n",
       "      <td>9d74489696</td>\n",
       "      <td>2018-03-04</td>\n",
       "      <td>2018-03-04 21:52:33.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "69676    97a6e047ca  0abe9ef2ce  2018-03-01 16:09:43  mobile         NaN   \n",
       "75864    581d5b54c1  0abe9ef2ce  2018-03-01 17:09:41  mobile  d0cf5cc6db   \n",
       "75907    581d5b54c1  0abe9ef2ce  2018-03-01 17:07:02  mobile  d0cf5cc6db   \n",
       "217474   01c9d91829  0abe9ef2ce  2018-03-01 16:13:49  mobile         NaN   \n",
       "253823   9f7388280e  0abe9ef2ce  2018-03-01 16:09:06  mobile         NaN   \n",
       "259819   d0f488ea78  0abe9ef2ce  2018-03-01 17:03:53  mobile         NaN   \n",
       "259843   d0f488ea78  0abe9ef2ce  2018-03-01 17:04:34  mobile         NaN   \n",
       "356603   989aa91456  0abe9ef2ce  2018-03-01 16:12:26  mobile         NaN   \n",
       "2392643  6717b7c979  0abe9ef2ce  2018-03-04 21:55:55  mobile  9d74489696   \n",
       "\n",
       "         order_date             order_time  quantity  type promise  \\\n",
       "69676           NaN                    NaN       NaN   NaN     NaN   \n",
       "75864    2018-03-01  2018-03-01 17:14:25.0       1.0   2.0     NaN   \n",
       "75907    2018-03-01  2018-03-01 17:14:25.0       1.0   2.0     NaN   \n",
       "217474          NaN                    NaN       NaN   NaN     NaN   \n",
       "253823          NaN                    NaN       NaN   NaN     NaN   \n",
       "259819          NaN                    NaN       NaN   NaN     NaN   \n",
       "259843          NaN                    NaN       NaN   NaN     NaN   \n",
       "356603          NaN                    NaN       NaN   NaN     NaN   \n",
       "2392643  2018-03-04  2018-03-04 21:52:33.0       1.0   1.0       2   \n",
       "\n",
       "         original_unit_price  final_unit_price  direct_discount_per_unit  \\\n",
       "69676                    NaN               NaN                       NaN   \n",
       "75864                   89.0              79.0                       0.0   \n",
       "75907                   89.0              79.0                       0.0   \n",
       "217474                   NaN               NaN                       NaN   \n",
       "253823                   NaN               NaN                       NaN   \n",
       "259819                   NaN               NaN                       NaN   \n",
       "259843                   NaN               NaN                       NaN   \n",
       "356603                   NaN               NaN                       NaN   \n",
       "2392643                  0.0               0.0                       0.0   \n",
       "\n",
       "         quantity_discount_per_unit  bundle_discount_per_unit  \\\n",
       "69676                           NaN                       NaN   \n",
       "75864                          10.0                       0.0   \n",
       "75907                          10.0                       0.0   \n",
       "217474                          NaN                       NaN   \n",
       "253823                          NaN                       NaN   \n",
       "259819                          NaN                       NaN   \n",
       "259843                          NaN                       NaN   \n",
       "356603                          NaN                       NaN   \n",
       "2392643                         0.0                       0.0   \n",
       "\n",
       "         coupon_discount_per_unit  gift_item  \n",
       "69676                         NaN        NaN  \n",
       "75864                         0.0        0.0  \n",
       "75907                         0.0        0.0  \n",
       "217474                        NaN        NaN  \n",
       "253823                        NaN        NaN  \n",
       "259819                        NaN        NaN  \n",
       "259843                        NaN        NaN  \n",
       "356603                        NaN        NaN  \n",
       "2392643                       0.0        1.0  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_out[clor_us_out['user_ID'] == '0abe9ef2ce']  # 外连接 user_ID "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5688906feac5a72e",
   "metadata": {},
   "source": [
    "### sku_ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "14ac999806abb72c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:19.726872100Z",
     "start_time": "2024-03-12T12:42:19.657268900Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>original_unit_price</th>\n",
       "      <th>final_unit_price</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>300</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>9a71d488b4</td>\n",
       "      <td>2018-03-01 13:06:49</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>301</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 00:55:47</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 01:09:35</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 01:09:25</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 23:55:53</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20260215</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>1bd5de3b3b</td>\n",
       "      <td>2018-03-31 14:51:33</td>\n",
       "      <td>pc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20260216</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:52:06</td>\n",
       "      <td>pc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20260217</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 20:20:50</td>\n",
       "      <td>pc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20260218</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 14:47:51</td>\n",
       "      <td>pc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20260219</th>\n",
       "      <td>581d5b54c1</td>\n",
       "      <td>12caeab56d</td>\n",
       "      <td>2018-03-31 13:34:58</td>\n",
       "      <td>pc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>108306 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel order_ID  \\\n",
       "300       581d5b54c1  9a71d488b4  2018-03-01 13:06:49  wechat      NaN   \n",
       "301       581d5b54c1         NaN  2018-03-01 00:55:47  wechat      NaN   \n",
       "302       581d5b54c1         NaN  2018-03-01 01:09:35  wechat      NaN   \n",
       "303       581d5b54c1         NaN  2018-03-01 01:09:25  wechat      NaN   \n",
       "304       581d5b54c1         NaN  2018-03-01 23:55:53  wechat      NaN   \n",
       "...              ...         ...                  ...     ...      ...   \n",
       "20260215  581d5b54c1  1bd5de3b3b  2018-03-31 14:51:33      pc      NaN   \n",
       "20260216  581d5b54c1         NaN  2018-03-31 11:52:06      pc      NaN   \n",
       "20260217  581d5b54c1         NaN  2018-03-31 20:20:50      pc      NaN   \n",
       "20260218  581d5b54c1         NaN  2018-03-31 14:47:51      pc      NaN   \n",
       "20260219  581d5b54c1  12caeab56d  2018-03-31 13:34:58      pc      NaN   \n",
       "\n",
       "         order_date order_time  quantity  type promise  original_unit_price  \\\n",
       "300             NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "301             NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "302             NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "303             NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "304             NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "...             ...        ...       ...   ...     ...                  ...   \n",
       "20260215        NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "20260216        NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "20260217        NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "20260218        NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "20260219        NaN        NaN       NaN   NaN     NaN                  NaN   \n",
       "\n",
       "          final_unit_price  direct_discount_per_unit  \\\n",
       "300                    NaN                       NaN   \n",
       "301                    NaN                       NaN   \n",
       "302                    NaN                       NaN   \n",
       "303                    NaN                       NaN   \n",
       "304                    NaN                       NaN   \n",
       "...                    ...                       ...   \n",
       "20260215               NaN                       NaN   \n",
       "20260216               NaN                       NaN   \n",
       "20260217               NaN                       NaN   \n",
       "20260218               NaN                       NaN   \n",
       "20260219               NaN                       NaN   \n",
       "\n",
       "          quantity_discount_per_unit  bundle_discount_per_unit  \\\n",
       "300                              NaN                       NaN   \n",
       "301                              NaN                       NaN   \n",
       "302                              NaN                       NaN   \n",
       "303                              NaN                       NaN   \n",
       "304                              NaN                       NaN   \n",
       "...                              ...                       ...   \n",
       "20260215                         NaN                       NaN   \n",
       "20260216                         NaN                       NaN   \n",
       "20260217                         NaN                       NaN   \n",
       "20260218                         NaN                       NaN   \n",
       "20260219                         NaN                       NaN   \n",
       "\n",
       "          coupon_discount_per_unit  gift_item  \n",
       "300                            NaN        NaN  \n",
       "301                            NaN        NaN  \n",
       "302                            NaN        NaN  \n",
       "303                            NaN        NaN  \n",
       "304                            NaN        NaN  \n",
       "...                            ...        ...  \n",
       "20260215                       NaN        NaN  \n",
       "20260216                       NaN        NaN  \n",
       "20260217                       NaN        NaN  \n",
       "20260218                       NaN        NaN  \n",
       "20260219                       NaN        NaN  \n",
       "\n",
       "[108306 rows x 17 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_out[clor_us_out['sku_ID'] == '581d5b54c1']  # clicks order user 外连接 根据sku查看点击和订单情况"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b3763bd6641cc87",
   "metadata": {},
   "source": [
    "# task 1 点击到订单的转化率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "71c9fa6800064eff",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:20.139949800Z",
     "start_time": "2024-03-12T12:42:19.721867700Z"
    }
   },
   "outputs": [],
   "source": [
    "clor_us_out['is_click'] = clor_us_out['request_time'].notnull().astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "ffeeac95a086a46c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:20.599907800Z",
     "start_time": "2024-03-12T12:42:20.139949800Z"
    }
   },
   "outputs": [],
   "source": [
    "clor_us_out['is_order'] = clor_us_out['order_time'].notnull().astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "73e03d79a4468f10",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:21.627884900Z",
     "start_time": "2024-03-12T12:42:20.574761100Z"
    }
   },
   "outputs": [],
   "source": [
    "sku_metrics = clor_us_out.groupby('sku_ID').agg({\n",
    "    'is_click': 'sum',\n",
    "    'is_order': 'sum'\n",
    "}).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ed25f65b1880612e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:21.643032300Z",
     "start_time": "2024-03-12T12:42:21.628891600Z"
    }
   },
   "outputs": [],
   "source": [
    "sku_metrics['order_click_ratio'] = sku_metrics['is_order'] / sku_metrics['is_click'].replace(0, pd.NA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "d04f2c90b67b91ac",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:21.664900500Z",
     "start_time": "2024-03-12T12:42:21.644045Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>is_click</th>\n",
       "      <th>is_order</th>\n",
       "      <th>order_click_ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>000161cd1b</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>00078c2a0f</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0009ac56b7</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>000aa92b82</td>\n",
       "      <td>666</td>\n",
       "      <td>41</td>\n",
       "      <td>0.061562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>000d4af39d</td>\n",
       "      <td>223</td>\n",
       "      <td>7</td>\n",
       "      <td>0.031390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31862</th>\n",
       "      <td>fff84ed7ec</td>\n",
       "      <td>661</td>\n",
       "      <td>40</td>\n",
       "      <td>0.060514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31863</th>\n",
       "      <td>fffaaabb99</td>\n",
       "      <td>316</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31864</th>\n",
       "      <td>fffe1bd280</td>\n",
       "      <td>27894</td>\n",
       "      <td>2609</td>\n",
       "      <td>0.093533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31865</th>\n",
       "      <td>fffe6eb4df</td>\n",
       "      <td>922</td>\n",
       "      <td>47</td>\n",
       "      <td>0.050976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31866</th>\n",
       "      <td>ffffd456aa</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>31867 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           sku_ID  is_click  is_order  order_click_ratio\n",
       "0      000161cd1b         9         0           0.000000\n",
       "1      00078c2a0f         9         0           0.000000\n",
       "2      0009ac56b7         9         0           0.000000\n",
       "3      000aa92b82       666        41           0.061562\n",
       "4      000d4af39d       223         7           0.031390\n",
       "...           ...       ...       ...                ...\n",
       "31862  fff84ed7ec       661        40           0.060514\n",
       "31863  fffaaabb99       316         0           0.000000\n",
       "31864  fffe1bd280     27894      2609           0.093533\n",
       "31865  fffe6eb4df       922        47           0.050976\n",
       "31866  ffffd456aa         6         0           0.000000\n",
       "\n",
       "[31867 rows x 4 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sku_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "4d8c9a2e76647557",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:13:55.443466500Z",
     "start_time": "2024-03-12T13:13:55.409062800Z"
    }
   },
   "outputs": [],
   "source": [
    "# 'skus' table 商品信息\n",
    "skus = pd.read_csv('./JD_data/JD_sku_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "606b1c6880de7b65",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:19:27.181499Z",
     "start_time": "2024-03-12T13:19:27.167449500Z"
    }
   },
   "outputs": [],
   "source": [
    "skus['attribute1'] = skus['attribute1'].replace('-', np.nan)\n",
    "skus['attribute2'] = skus['attribute2'].replace('-', np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "67899be672704854",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:19:35.202506100Z",
     "start_time": "2024-03-12T13:19:35.186417700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>type</th>\n",
       "      <th>brand_ID</th>\n",
       "      <th>attribute1</th>\n",
       "      <th>attribute2</th>\n",
       "      <th>activate_date</th>\n",
       "      <th>deactivate_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>1</td>\n",
       "      <td>c3ab4bf4d9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>1</td>\n",
       "      <td>1d8b4b4c63</td>\n",
       "      <td>2.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2</td>\n",
       "      <td>eb7d2a675a</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>acad9fed04</td>\n",
       "      <td>2</td>\n",
       "      <td>9b0d3a5fc6</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2fa77e3b4d</td>\n",
       "      <td>2</td>\n",
       "      <td>b681299668</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31863</th>\n",
       "      <td>121d8470d2</td>\n",
       "      <td>2</td>\n",
       "      <td>3daeabd2ce</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-30</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31864</th>\n",
       "      <td>e41c62189d</td>\n",
       "      <td>2</td>\n",
       "      <td>8b40ec9ab7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31865</th>\n",
       "      <td>01d16f7678</td>\n",
       "      <td>2</td>\n",
       "      <td>e686890dbc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-29</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31866</th>\n",
       "      <td>83fc55d93b</td>\n",
       "      <td>2</td>\n",
       "      <td>9d3465eacc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-29</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31867</th>\n",
       "      <td>c1b1a4b058</td>\n",
       "      <td>2</td>\n",
       "      <td>65c76167e3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>31868 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           sku_ID  type    brand_ID attribute1 attribute2 activate_date  \\\n",
       "0      a234e08c57     1  c3ab4bf4d9        3.0       60.0           NaN   \n",
       "1      6449e1fd87     1  1d8b4b4c63        2.0       50.0           NaN   \n",
       "2      09b70fcd83     2  eb7d2a675a        3.0       70.0           NaN   \n",
       "3      acad9fed04     2  9b0d3a5fc6        3.0       70.0           NaN   \n",
       "4      2fa77e3b4d     2  b681299668        NaN        NaN           NaN   \n",
       "...           ...   ...         ...        ...        ...           ...   \n",
       "31863  121d8470d2     2  3daeabd2ce        3.0        NaN    2018-03-30   \n",
       "31864  e41c62189d     2  8b40ec9ab7        NaN        NaN           NaN   \n",
       "31865  01d16f7678     2  e686890dbc        NaN        NaN    2018-03-29   \n",
       "31866  83fc55d93b     2  9d3465eacc        NaN        NaN    2018-03-29   \n",
       "31867  c1b1a4b058     2  65c76167e3        NaN        NaN    2018-03-31   \n",
       "\n",
       "      deactivate_date  \n",
       "0                 NaN  \n",
       "1                 NaN  \n",
       "2                 NaN  \n",
       "3                 NaN  \n",
       "4                 NaN  \n",
       "...               ...  \n",
       "31863             NaN  \n",
       "31864             NaN  \n",
       "31865             NaN  \n",
       "31866             NaN  \n",
       "31867             NaN  \n",
       "\n",
       "[31868 rows x 7 columns]"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "skus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "73b8aa6dfe3b2802",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:19:43.258347Z",
     "start_time": "2024-03-12T13:19:43.226503200Z"
    }
   },
   "outputs": [],
   "source": [
    "skus_clor_ratio = pd.merge(skus,sku_metrics,on=['sku_ID'],how = 'left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "72e4e7cb1feea73b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:19:51.084774300Z",
     "start_time": "2024-03-12T13:19:51.066973200Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>type</th>\n",
       "      <th>brand_ID</th>\n",
       "      <th>attribute1</th>\n",
       "      <th>attribute2</th>\n",
       "      <th>activate_date</th>\n",
       "      <th>deactivate_date</th>\n",
       "      <th>is_click</th>\n",
       "      <th>is_order</th>\n",
       "      <th>order_click_ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>1</td>\n",
       "      <td>c3ab4bf4d9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>644</td>\n",
       "      <td>74</td>\n",
       "      <td>0.114907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>1</td>\n",
       "      <td>1d8b4b4c63</td>\n",
       "      <td>2.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>871</td>\n",
       "      <td>123</td>\n",
       "      <td>0.141217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2</td>\n",
       "      <td>eb7d2a675a</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4860</td>\n",
       "      <td>699</td>\n",
       "      <td>0.143827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>acad9fed04</td>\n",
       "      <td>2</td>\n",
       "      <td>9b0d3a5fc6</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5298</td>\n",
       "      <td>222</td>\n",
       "      <td>0.041903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2fa77e3b4d</td>\n",
       "      <td>2</td>\n",
       "      <td>b681299668</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2360</td>\n",
       "      <td>139</td>\n",
       "      <td>0.058898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31863</th>\n",
       "      <td>121d8470d2</td>\n",
       "      <td>2</td>\n",
       "      <td>3daeabd2ce</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31864</th>\n",
       "      <td>e41c62189d</td>\n",
       "      <td>2</td>\n",
       "      <td>8b40ec9ab7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31865</th>\n",
       "      <td>01d16f7678</td>\n",
       "      <td>2</td>\n",
       "      <td>e686890dbc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31866</th>\n",
       "      <td>83fc55d93b</td>\n",
       "      <td>2</td>\n",
       "      <td>9d3465eacc</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31867</th>\n",
       "      <td>c1b1a4b058</td>\n",
       "      <td>2</td>\n",
       "      <td>65c76167e3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>31868 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           sku_ID  type    brand_ID attribute1 attribute2 activate_date  \\\n",
       "0      a234e08c57     1  c3ab4bf4d9        3.0       60.0           NaN   \n",
       "1      6449e1fd87     1  1d8b4b4c63        2.0       50.0           NaN   \n",
       "2      09b70fcd83     2  eb7d2a675a        3.0       70.0           NaN   \n",
       "3      acad9fed04     2  9b0d3a5fc6        3.0       70.0           NaN   \n",
       "4      2fa77e3b4d     2  b681299668        NaN        NaN           NaN   \n",
       "...           ...   ...         ...        ...        ...           ...   \n",
       "31863  121d8470d2     2  3daeabd2ce        3.0        NaN    2018-03-30   \n",
       "31864  e41c62189d     2  8b40ec9ab7        NaN        NaN           NaN   \n",
       "31865  01d16f7678     2  e686890dbc        NaN        NaN    2018-03-29   \n",
       "31866  83fc55d93b     2  9d3465eacc        NaN        NaN    2018-03-29   \n",
       "31867  c1b1a4b058     2  65c76167e3        NaN        NaN    2018-03-31   \n",
       "\n",
       "      deactivate_date  is_click  is_order  order_click_ratio  \n",
       "0                 NaN       644        74           0.114907  \n",
       "1                 NaN       871       123           0.141217  \n",
       "2                 NaN      4860       699           0.143827  \n",
       "3                 NaN      5298       222           0.041903  \n",
       "4                 NaN      2360       139           0.058898  \n",
       "...               ...       ...       ...                ...  \n",
       "31863             NaN         2         0           0.000000  \n",
       "31864             NaN         1         0           0.000000  \n",
       "31865             NaN         1         0           0.000000  \n",
       "31866             NaN         1         0           0.000000  \n",
       "31867             NaN         1         0           0.000000  \n",
       "\n",
       "[31868 rows x 10 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "skus_clor_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "30cf4a2d69b05147",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:22:56.842061500Z",
     "start_time": "2024-03-12T13:22:56.827028400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['c3ab4bf4d9', '1d8b4b4c63', 'eb7d2a675a', ..., 'cd87b38171',\n",
       "       '07b69925c8', '65c76167e3'], dtype=object)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "skus_clor_ratio['brand_ID'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "3840110b4fedd064",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:34:55.271934300Z",
     "start_time": "2024-03-12T13:34:55.247704700Z"
    }
   },
   "outputs": [],
   "source": [
    "# 保留有用的属性（没有大量缺失值），保留了点击量和点击下单转化率\n",
    "new_features = ['type','attribute1','attribute2','is_click','order_click_ratio']\n",
    "sku_clor_raw = skus_clor_ratio[new_features]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "62e5a3131a0199a4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T13:35:13.365579700Z",
     "start_time": "2024-03-12T13:35:13.334499400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>attribute1</th>\n",
       "      <th>attribute2</th>\n",
       "      <th>is_click</th>\n",
       "      <th>order_click_ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>644</td>\n",
       "      <td>0.114907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>871</td>\n",
       "      <td>0.141217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>4860</td>\n",
       "      <td>0.143827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>5298</td>\n",
       "      <td>0.041903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2360</td>\n",
       "      <td>0.058898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31863</th>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31864</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31865</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31866</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31867</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>31868 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       type attribute1 attribute2  is_click  order_click_ratio\n",
       "0         1        3.0       60.0       644           0.114907\n",
       "1         1        2.0       50.0       871           0.141217\n",
       "2         2        3.0       70.0      4860           0.143827\n",
       "3         2        3.0       70.0      5298           0.041903\n",
       "4         2        NaN        NaN      2360           0.058898\n",
       "...     ...        ...        ...       ...                ...\n",
       "31863     2        3.0        NaN         2           0.000000\n",
       "31864     2        NaN        NaN         1           0.000000\n",
       "31865     2        NaN        NaN         1           0.000000\n",
       "31866     2        NaN        NaN         1           0.000000\n",
       "31867     2        NaN        NaN         1           0.000000\n",
       "\n",
       "[31868 rows x 5 columns]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sku_clor_raw"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c06ae7707c43d3b",
   "metadata": {},
   "source": [
    "# task 2 预测某次点击是否下单"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "4c90aca71084a3a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:29.184800900Z",
     "start_time": "2024-03-12T12:42:21.659364Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sku_ID</th>\n",
       "      <th>user_ID</th>\n",
       "      <th>request_time</th>\n",
       "      <th>channel</th>\n",
       "      <th>order_ID</th>\n",
       "      <th>order_date</th>\n",
       "      <th>order_time</th>\n",
       "      <th>quantity</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>...</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "      <th>is_click</th>\n",
       "      <th>is_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a234e08c57</td>\n",
       "      <td>4c3d6d10c2</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6449e1fd87</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>2791ec4485</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>wechat</td>\n",
       "      <td>e4874e2a00</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>2018-03-01 14:08:33.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>0.0</td>\n",
       "      <td>F</td>\n",
       "      <td>26-35</td>\n",
       "      <td>M</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>eb0718c1c9</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09b70fcd83</td>\n",
       "      <td>59f84cf342</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321452</th>\n",
       "      <td>a8a96e022a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 21:45:07</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321453</th>\n",
       "      <td>eb3f2d2fd8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:11</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321454</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 11:31:08</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321455</th>\n",
       "      <td>fbce41fd82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 19:28:25</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20321456</th>\n",
       "      <td>87b853b910</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-31 06:29:47</td>\n",
       "      <td>others</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20321457 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              sku_ID     user_ID         request_time channel    order_ID  \\\n",
       "0         a234e08c57  4c3d6d10c2  2018-03-01 23:57:53  wechat         NaN   \n",
       "1         6449e1fd87         NaN  2018-03-01 16:13:48  wechat         NaN   \n",
       "2         09b70fcd83  2791ec4485  2018-03-01 22:10:51  wechat  e4874e2a00   \n",
       "3         09b70fcd83  eb0718c1c9  2018-03-01 16:34:08  wechat         NaN   \n",
       "4         09b70fcd83  59f84cf342  2018-03-01 22:20:35  wechat         NaN   \n",
       "...              ...         ...                  ...     ...         ...   \n",
       "20321452  a8a96e022a         NaN  2018-03-31 21:45:07  others         NaN   \n",
       "20321453  eb3f2d2fd8         NaN  2018-03-31 11:31:11  others         NaN   \n",
       "20321454  fbce41fd82         NaN  2018-03-31 11:31:08  others         NaN   \n",
       "20321455  fbce41fd82         NaN  2018-03-31 19:28:25  others         NaN   \n",
       "20321456  87b853b910         NaN  2018-03-31 06:29:47  others         NaN   \n",
       "\n",
       "          order_date             order_time  quantity  type promise  ...  \\\n",
       "0                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "1                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "2         2018-03-01  2018-03-01 14:08:33.0       1.0   2.0     NaN  ...   \n",
       "3                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "4                NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "...              ...                    ...       ...   ...     ...  ...   \n",
       "20321452         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321453         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321454         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321455         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "20321456         NaN                    NaN       NaN   NaN     NaN  ...   \n",
       "\n",
       "          first_order_month  plus  gender    age  marital_status  education  \\\n",
       "0                       NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "1                       NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "2                   2018-02   0.0       F  26-35               M        2.0   \n",
       "3                       NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "4                       NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "...                     ...   ...     ...    ...             ...        ...   \n",
       "20321452                NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "20321453                NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "20321454                NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "20321455                NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "20321456                NaN   NaN     NaN    NaN             NaN        NaN   \n",
       "\n",
       "          city_level  purchase_power is_click  is_order  \n",
       "0                NaN             NaN        1         0  \n",
       "1                NaN             NaN        1         0  \n",
       "2                3.0             3.0        1         1  \n",
       "3                NaN             NaN        1         0  \n",
       "4                NaN             NaN        1         0  \n",
       "...              ...             ...      ...       ...  \n",
       "20321452         NaN             NaN        1         0  \n",
       "20321453         NaN             NaN        1         0  \n",
       "20321454         NaN             NaN        1         0  \n",
       "20321455         NaN             NaN        1         0  \n",
       "20321456         NaN             NaN        1         0  \n",
       "\n",
       "[20321457 rows x 28 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "c62e6e8a3fcecdfa",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:29.225987700Z",
     "start_time": "2024-03-12T12:42:29.185806700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['sku_ID', 'user_ID', 'request_time', 'channel', 'order_ID',\n",
       "       'order_date', 'order_time', 'quantity', 'type', 'promise',\n",
       "       'original_unit_price', 'final_unit_price', 'direct_discount_per_unit',\n",
       "       'quantity_discount_per_unit', 'bundle_discount_per_unit',\n",
       "       'coupon_discount_per_unit', 'gift_item', 'user_level',\n",
       "       'first_order_month', 'plus', 'gender', 'age', 'marital_status',\n",
       "       'education', 'city_level', 'purchase_power', 'is_click', 'is_order'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clor_us_out.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "ef6736eaab2288f7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:29.250071300Z",
     "start_time": "2024-03-12T12:42:29.201815800Z"
    }
   },
   "outputs": [],
   "source": [
    "# 类别属性\n",
    "categorical_features = ['channel', 'type', 'promise', 'user_level', 'plus', 'gender', 'marital_status', 'education']\n",
    "\n",
    "# 废弃属性 ID_features \n",
    "ID_features = ['sku_ID', 'user_ID', 'order_ID']\n",
    "abandon_features = ['sku_ID', 'user_ID', 'order_ID', 'is_click']\n",
    "\n",
    "# 数值属性\n",
    "numerical_features = [l for l in list(clor_us_out.columns) if\n",
    "                      l not in abandon_features and l not in categorical_features]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "d967f0d8f8414a8d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:29.294574400Z",
     "start_time": "2024-03-12T12:42:29.217304500Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['request_time',\n",
       " 'order_date',\n",
       " 'order_time',\n",
       " 'quantity',\n",
       " 'original_unit_price',\n",
       " 'final_unit_price',\n",
       " 'direct_discount_per_unit',\n",
       " 'quantity_discount_per_unit',\n",
       " 'bundle_discount_per_unit',\n",
       " 'coupon_discount_per_unit',\n",
       " 'gift_item',\n",
       " 'first_order_month',\n",
       " 'age',\n",
       " 'city_level',\n",
       " 'purchase_power',\n",
       " 'is_order']"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numerical_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "abe039c3fbb128db",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:34.263872600Z",
     "start_time": "2024-03-12T12:42:29.231998600Z"
    }
   },
   "outputs": [],
   "source": [
    "combine_raw = clor_us_out[categorical_features + numerical_features]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "99e96a19680e5988",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:42:34.281267800Z",
     "start_time": "2024-03-12T12:42:34.263872600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>channel</th>\n",
       "      <th>type</th>\n",
       "      <th>promise</th>\n",
       "      <th>user_level</th>\n",
       "      <th>plus</th>\n",
       "      <th>gender</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>education</th>\n",
       "      <th>request_time</th>\n",
       "      <th>order_date</th>\n",
       "      <th>...</th>\n",
       "      <th>direct_discount_per_unit</th>\n",
       "      <th>quantity_discount_per_unit</th>\n",
       "      <th>bundle_discount_per_unit</th>\n",
       "      <th>coupon_discount_per_unit</th>\n",
       "      <th>gift_item</th>\n",
       "      <th>first_order_month</th>\n",
       "      <th>age</th>\n",
       "      <th>city_level</th>\n",
       "      <th>purchase_power</th>\n",
       "      <th>is_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 23:57:53</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:13:48</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>wechat</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>F</td>\n",
       "      <td>M</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2018-03-01 22:10:51</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-02</td>\n",
       "      <td>26-35</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 16:34:08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>wechat</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2018-03-01 22:20:35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  channel  type promise  user_level  plus gender marital_status  education  \\\n",
       "0  wechat   NaN     NaN         NaN   NaN    NaN            NaN        NaN   \n",
       "1  wechat   NaN     NaN         NaN   NaN    NaN            NaN        NaN   \n",
       "2  wechat   2.0     NaN         1.0   0.0      F              M        2.0   \n",
       "3  wechat   NaN     NaN         NaN   NaN    NaN            NaN        NaN   \n",
       "4  wechat   NaN     NaN         NaN   NaN    NaN            NaN        NaN   \n",
       "\n",
       "          request_time  order_date  ... direct_discount_per_unit  \\\n",
       "0  2018-03-01 23:57:53         NaN  ...                      NaN   \n",
       "1  2018-03-01 16:13:48         NaN  ...                      NaN   \n",
       "2  2018-03-01 22:10:51  2018-03-01  ...                     39.0   \n",
       "3  2018-03-01 16:34:08         NaN  ...                      NaN   \n",
       "4  2018-03-01 22:20:35         NaN  ...                      NaN   \n",
       "\n",
       "   quantity_discount_per_unit  bundle_discount_per_unit  \\\n",
       "0                         NaN                       NaN   \n",
       "1                         NaN                       NaN   \n",
       "2                         0.0                       0.0   \n",
       "3                         NaN                       NaN   \n",
       "4                         NaN                       NaN   \n",
       "\n",
       "   coupon_discount_per_unit  gift_item  first_order_month    age  city_level  \\\n",
       "0                       NaN        NaN                NaN    NaN         NaN   \n",
       "1                       NaN        NaN                NaN    NaN         NaN   \n",
       "2                       0.0        0.0            2018-02  26-35         3.0   \n",
       "3                       NaN        NaN                NaN    NaN         NaN   \n",
       "4                       NaN        NaN                NaN    NaN         NaN   \n",
       "\n",
       "   purchase_power is_order  \n",
       "0             NaN        0  \n",
       "1             NaN        0  \n",
       "2             3.0        1  \n",
       "3             NaN        0  \n",
       "4             NaN        0  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combine_raw.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "b74568894d24f8d4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:43:11.018544800Z",
     "start_time": "2024-03-12T12:42:34.278624100Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "channel 缺失个数： 0 \t占比为： 0.0 %\n",
      "type 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "promise 缺失个数： 18538976 \t占比为： 91.22857676986449 %\n",
      "user_level 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "plus 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "gender 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "marital_status 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "education 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "request_time 缺失个数： 0 \t占比为： 0.0 %\n",
      "order_date 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "order_time 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "quantity 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "original_unit_price 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "final_unit_price 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "direct_discount_per_unit 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "quantity_discount_per_unit 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "bundle_discount_per_unit 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "coupon_discount_per_unit 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "gift_item 缺失个数： 17685955 \t占比为： 87.03093975988041 %\n",
      "first_order_month 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "age 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "city_level 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "purchase_power 缺失个数： 14026962 \t占比为： 69.02537549349931 %\n",
      "is_order 缺失个数： 0 \t占比为： 0.0 %\n"
     ]
    }
   ],
   "source": [
    "for column in combine_raw.columns:\n",
    "    df = combine_raw[combine_raw[column].isnull()]\n",
    "    print(column, '缺失个数：', len(df), '\\t占比为：', len(df) / len(combine_raw) * 100, '%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "57978668e1c0f13f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-12T12:43:11.044151500Z",
     "start_time": "2024-03-12T12:43:11.018544800Z"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
